# library import
from utils import *
data = parse_repbase("data/RepBase_subsetv2.multifasta")
length_hist(data, "RepBase")
Stats(data)
| Number of consensi: | 17407 |
| Longest sequence (pb): | 45000 |
| Shortest sequence (pb): | 40 |
| Average length (pb): | 2998.0 |
| Median length (pb): | 2525.0 |
| LTR | 7735 |
| DNA | 5621 |
| LINE | 2347 |
| non_LTR | 1224 |
| UNKNOWN | 177 |
| SINE | 144 |
| SAT | 121 |
| MSAT | 29 |
| pseudogene | 7 |
| simple | 2 |
seq_dict = cons_parser("data/consensi/RM2_consensi.fa.classified")
length_hist(seq_dict, "RepeatModeler2")
Stats(seq_dict)
| Number of consensi: | 15384 |
| Longest sequence (pb): | 14783 |
| Shortest sequence (pb): | 29 |
| Average length (pb): | 2262.3 |
| Median length (pb): | 773.0 |
| LTR | 12565 |
| Unknown | 2069 |
| LINE | 513 |
| DNA | 147 |
| RC | 34 |
| SINE | 22 |
| tRNA | 14 |
| rRNA | 8 |
| Simple_repeat | 7 |
| Satellite | 5 |
seq_dict = cons_parser("data/consensi/EDTA_consensi.fa")
length_hist(seq_dict, "EDTA")
Stats(seq_dict)
| Number of consensi: | 16191 |
| Longest sequence (pb): | 16685 |
| Shortest sequence (pb): | 80 |
| Average length (pb): | 1971.9 |
| Median length (pb): | 1109.0 |
| LTR | 10166 |
| DNA | 4725 |
| MITE | 1300 |
df = cons_parser("data/consensi/MITE_consensi.fa")
length_hist(df, "MITE-Tracker")
Stats(df)
| Number of consensi: | 10863 |
| Longest sequence (pb): | 800 |
| Shortest sequence (pb): | 49 |
| Average length (pb): | 289.4 |
| Median length (pb): | 235.0 |
| MITE | 10863 |
For EDTA and RepeatModeler2:
For MITE-Tracker:
Coverage of the consensus:
reads_hist("data/sam/RepBase_coverage.sam", "data/RepBase_subsetv2.multifasta", "RepBase")
reads_hist("data/sam/RM2_coverage.sam", "data/consensi/RM2_consensi.fa.classified", "RepeatModeler2")
reads_hist("data/sam/EDTA_coverage.sam", "data/consensi/EDTA_consensi.fa", "EDTA")
reads_hist("data/sam/MITE_coverage.sam", "data/consensi/MITE_consensi.fa", "MITE-Tracker")